1 Load packages

pacman::p_load(ggplot2, ggExtra, gganimate, ggcorrplot, extrafont, tidyverse, tidySEM, dplyr, 
               UpSetR, plyr, gapminder, plotly, janitor, ggbreak, lavaan, patchwork, install = TRUE)

I have been collecting some useful resources for data visualization in R. Below are a few top selections from my exploration.

2 Correlation using correlogram

You could move beyond corration tables and have simple yet informative plots using ggcorrplot.

data(mtcars)
correl <- round(cor(mtcars), 2)

ggcorrplot(correl, hc.order = TRUE, 
           type = "upper", 
           lab = TRUE, 
           lab_size = 3, 
           method="square",
           colors = c("#B95C50",  "#FFFFFF", "#5B7586"), 
           title="Correlogram of mtcars", 
           # ggtheme=theme_light
           # ggtheme=hrbrthemes::theme_ipsum()
           )

2.1 UpsetR plots

Ever wanted to substitute your overcrowded venn diagrams with something more fancy? Well, there is an option for you in UpsetR plots. Other than the clean look of these plots, they also allow for additional metadata to be visualised alongside the intersections/shared variables in a dataset.

movies <- read.csv(system.file("extdata", "movies.csv", package = "UpSetR"),
                   header = T, sep = ";")


upset(movies, main.bar.color = "black",
      queries = list(list(query = intersects,
                          params = list("Drama"),
                          active = T)),
      attribute.plots = list(gridrows = 50,
                             plots = list(list(plot = histogram,
                                               x = "ReleaseDate", queries = F),
                                          list(plot = histogram,
                                               x = "AvgRating", queries = T)),
                             ncols = 2))

upset(movies,
      main.bar.color = "black",
      queries = list(list(query = intersects,
                          params = list("Drama"),
                          color = "red", active = F),
                     list(query = intersects,
                          params = list("Action", "Drama"), active = T),
                     list(query = intersects,
                          params = list("Drama", "Comedy", "Action"),
                          color = "orange", active = T)),
      attribute.plots = list(gridrows = 45,
                             plots = list(list(plot = scatter_plot,
                                               x = "ReleaseDate",
                                               y = "AvgRating",
                                               queries = T),
                                          list(plot = scatter_plot,
                                               x = "AvgRating",
                                               y = "Watches",
                                               queries = F)),
                             ncols = 2),
      query.legend = "bottom")

myplot <- function(mydata, x, y) {
   plot <- (ggplot(data = mydata, aes_string(x = x, y = y, colour = "color")) +
               geom_point() +
               scale_color_identity() +
               theme(plot.margin = unit(c(0, 0, 0, 0), "cm")))
}

another.plot <- function(data, x, y) {
   data$decades <- round_any(as.integer(unlist(data[y])), 10, ceiling)
   data <- data[which(data$decades >= 1970), ]
   myplot <- (ggplot(data, aes_string(x = x)) +
                 geom_density(aes(fill = factor(decades)),
                              alpha = 0.4) +
                 theme(plot.margin = unit(c(0, 0, 0, 0), "cm"),
                       legend.key.size = unit(0.4, "cm")))
}

# Example of applying the myplot custom attribute plot defined above to the data.

upset(movies, main.bar.color = "black",
      queries = list(list(query = intersects,
                          params = list("Drama"),
                          color = "red",
                          active = F),
                     list(query = intersects,
                          params = list("Action", "Drama"),
                          active = T),
                     list(query = intersects,
                          params = list("Drama", "Comedy", "Action"),
                          color = "orange",
                          active = T)),
      attribute.plots = list(gridrows = 45,
                             plots = list(list(plot = myplot,
                                               x = "ReleaseDate",
                                               y = "AvgRating",
                                               queries = T),
                                          list(plot = another.plot,
                                               x = "AvgRating",
                                               y = "ReleaseDate",
                                               queries = F)),
                             ncols = 2))

3 Breathing some life into plots

3.1 Plotly visualizations for shiny and web applications

Web-based interactive charting is gaining popularity and no other package IMO does a better job like plotly. With this beast, you can plot beyond the ggplot2 API capabilities

g <- ggplot(faithful, aes(x = eruptions, y = waiting)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
xlim(1, 6) + ylim(40, 100)

gg <- ggplotly(g, dynamicTicks = "y")
style(gg, hoveron = "points", hoverinfo = "x+y+text", hoverlabel = list(bgcolor = "white"))
gapminder %>%
   plot_ly(
      x = ~gdpPercap,
      y = ~lifeExp,
      size = ~pop,
      color = ~continent,
      frame = ~year,
      text = ~country,
      hoverinfo = "text",
      type = 'scatter',
      mode = 'markers'
   ) %>%
   layout(
      xaxis = list(
         type = "log"
      )
   )

3.2 Gganimate allows for transition through distinct variables

Like plotly, gganimate allows for automated animation of ggplot2-like graphics. See more examples here

Starting with a generic ggplot2 plot:

(p <- ggplot(
  gapminder, 
  aes(x = gdpPercap, y=lifeExp, size = pop, colour = country)
  ) +
  geom_point(show.legend = FALSE, alpha = 0.7) +
  scale_color_viridis_d() +
  scale_size(range = c(2, 12)) +
  scale_x_log10() +
  labs(x = "GDP per capita", y = "Life expectancy") +
  hrbrthemes::theme_ipsum())

We can add some animation to teh plot. Since RMarkdown isn’t going to render the animation, we first need to save a GIF which can then be rendered outside the chunk

p1 <- p + transition_time(year) +
  labs(title = "Year: {frame_time}")
anim_save("p1.gif", p1)

4 Population Pyramids and Diverging Lollipop Charts

You could read about the nuts and bolts of this fun plot here

email_campaign_funnel <- read.csv("https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv")
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")

# Plot
ggplot(email_campaign_funnel, aes(x = Stage, y = Users, fill = Gender)) +   # Fill column
                              geom_bar(stat = "identity", width = .6) +   # draw the bars
                              scale_y_continuous(breaks = brks,   # Breaks
                                                 labels = lbls) + # Labels
                              coord_flip() +  # Flip axes
                              labs(title="Email Campaign Funnel") +
                              ggthemes::theme_excel_new() + 
                              # hrbrthemes::theme_ipsum() +
                              theme(plot.title = element_text(hjust = .5), 
                                    axis.ticks = element_blank()) +   # Centre plot title
                              scale_fill_manual(values = c("#B95C50", "#5B7586"))

# Data Prep
data("mtcars")  # load data
mtcars$`car name` <- rownames(mtcars)  # create new column for car names
mtcars$mpg_z <- round((mtcars$mpg - mean(mtcars$mpg))/sd(mtcars$mpg), 2)  # compute normalized mpg
mtcars$mpg_type <- ifelse(mtcars$mpg_z < 0, "below", "above")  # above / below avg flag
mtcars <- mtcars[order(mtcars$mpg_z), ]  # sort
mtcars$`car name` <- factor(mtcars$`car name`, levels = mtcars$`car name`)  # convert to factor to retain sorted order in plot.

ggplot(mtcars, aes(x=`car name`, y=mpg_z, label=mpg_z)) + 
  geom_point(stat='identity', fill="black", size=6)  +
  geom_segment(aes(y = 0, 
                   x = `car name`, 
                   yend = mpg_z, 
                   xend = `car name`), 
               color = "black") +
  geom_text(color="white", size=2) +
  labs(title="Diverging Lollipop Chart", 
       subtitle="Normalized mileage from 'mtcars': Lollipop") + 
  ylim(-2.5, 2.5) +
  coord_flip()

5 Structural Equation Modeling (SEM) and Confirmatory Factor Analysis (CFA)

Some very smart humans have talked in detail about SEM and CFA here. SEM technique is the combination of factor analysis and multiple regression analysis, and it is used to analyze the structural relationship between measured variables and latent constructs. CFA on the other hand borrows many of the same concepts from SEM and exploratory factor analysis. However, instead of letting the data tell us the factor structure, we apply pre-determineed factor structure and verify the psychometric structure of a previously developed scale. This of it as a particular case of SEM for model evaluation. It is a process which consists in specifying quantity and kinds of observed variables to one or more latent variables and analyze how well those variables measure the latent variable itself. Think of a latent variable as an artificial variable that is represented as a linear combination of observed variables.

You’ve fit structural equations in observed variable models and latent variables, how do you best present this data? SEM tables look horrible and unreadable by the way!

Enter the SEM graphs..

HS.model <- ' visual  =~ x1 + x2 + x3
              textual =~ x4 + x5 + x6
              speed   =~ x7 + x8 + x9 '
fit <- cfa(HS.model, data=HolzingerSwineford1939)
graph_sem(model = fit)

lay <- get_layout("", "", "visual","","textual","","speed","", "",
                  "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", rows = 2)

graph_sem(fit, layout = lay)

semPlot::semPaths(fit, "std", curvePivot = TRUE, intercepts = TRUE, intStyle = "multi")

6 Patchwork over grid

Patchwork makes alignment of ggplot2 objects seamless. You should try it sometime..

iris1 <- ggplot(iris, aes(x = Species, y = Sepal.Length)) +
  geom_boxplot()

iris2 <- ggplot(iris, aes(x = Sepal.Length, fill = Species)) +
  geom_density(alpha = 0.7) +
  theme(legend.position = c(0.8, 0.8))

p1 <- ggplot(mtcars) + geom_point(aes(mpg, disp))
p2 <- ggplot(mtcars) + geom_boxplot(aes(gear, disp, group = gear))

x <- rnorm(100)
eps <- rnorm(100,0,.2)
p3 <- qplot(x,3*x+eps)

6.0.1 Side by Side? use “|” or “+”

(iris1|iris2)

iris1+iris2

6.0.2 Stack? use “/”

(iris1/iris2)

6.0.3 Grouped

(iris1|iris2)/p1/(p2|p3)

6.0.4 Annotate (generic)

(iris1|iris2)/p1/(p2|p3) + 
  plot_annotation(tag_levels = 'A')

6.0.5 Annotate (multilevel)

(iris1|iris2)/(p1+p2+p3+ plot_layout(tag_level = 'new')) +
  plot_annotation(tag_levels = c('A',"1"))

I have used SEM and CFA in some published real-life data < href=“https://raw.githubusercontent.com/fmobegi/TSI-Data-Analysis/master/TSI_data_analysis.html” target="_blank">before.

7 Session Information

All package versions used in this run are listed below.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Australia.1252  LC_CTYPE=English_Australia.1252   
## [3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C                      
## [5] LC_TIME=English_Australia.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.1.2      lavaan_0.6-12        ggbreak_0.1.0       
##  [4] janitor_2.1.0        plotly_4.10.0        gapminder_0.3.0     
##  [7] plyr_1.8.7           UpSetR_1.4.0         tidySEM_0.2.3       
## [10] OpenMx_2.20.6        forcats_0.5.2        stringr_1.4.1       
## [13] dplyr_1.0.9          purrr_0.3.4          readr_2.1.2         
## [16] tidyr_1.2.0          tibble_3.1.8         tidyverse_1.3.2     
## [19] extrafont_0.18       ggcorrplot_0.1.3     gganimate_1.0.7.9000
## [22] ggExtra_0.10.0       ggplot2_3.3.6       
## 
## loaded via a namespace (and not attached):
##   [1] pacman_0.5.1          utf8_1.2.2            proto_1.0.0          
##   [4] tidyselect_1.1.2      lme4_1.1-30           htmlwidgets_1.5.4    
##   [7] grid_4.1.2            munsell_0.5.0         codetools_0.2-18     
##  [10] interp_1.1-3          future_1.27.0         miniUI_0.1.1.1       
##  [13] withr_2.5.0           colorspace_2.0-3      highr_0.9            
##  [16] knitr_1.40            rstudioapi_0.14       stats4_4.1.2         
##  [19] Rttf2pt1_1.3.10       bayesplot_1.9.0       listenv_0.8.0        
##  [22] labeling_0.4.2        mi_1.1                rstan_2.21.5         
##  [25] mnormt_2.1.0          farver_2.1.1          coda_0.19-4          
##  [28] parallelly_1.32.1     vctrs_0.4.1           generics_0.1.3       
##  [31] xfun_0.32             ggthemes_4.2.4        R6_2.5.1             
##  [34] arm_1.13-1            isoband_0.2.5         cachem_1.0.6         
##  [37] gridGraphics_0.5-1    assertthat_0.2.1      promises_1.2.0.1     
##  [40] scales_1.2.1          nnet_7.3-16           googlesheets4_1.0.1  
##  [43] texreg_1.38.6         gtable_0.3.0          globals_0.16.1       
##  [46] processx_3.7.0        sandwich_3.0-2        rlang_1.0.4          
##  [49] systemfonts_1.0.4     splines_4.1.2         extrafontdb_1.0      
##  [52] lazyeval_0.2.2        gargle_1.2.0          broom_1.0.1          
##  [55] checkmate_2.1.0       inline_0.3.19         abind_1.4-5          
##  [58] yaml_2.3.5            reshape2_1.4.4        modelr_0.1.9         
##  [61] crosstalk_1.2.0       backports_1.4.1       httpuv_1.6.5         
##  [64] Hmisc_4.7-1           tools_4.1.2           psych_2.2.5          
##  [67] ggplotify_0.1.0       ellipsis_0.3.2        jquerylib_0.1.4      
##  [70] RColorBrewer_1.1-3    ggridges_0.5.3        gsubfn_0.7           
##  [73] Rcpp_1.0.9            base64enc_0.1-3       progress_1.2.2       
##  [76] rockchalk_1.8.157     ps_1.7.1              prettyunits_1.1.1    
##  [79] rpart_4.1-15          deldir_1.0-6          pbapply_1.5-0        
##  [82] zoo_1.8-10            cluster_2.1.2         qgraph_1.9.2         
##  [85] haven_2.5.1           hrbrthemes_0.8.0      fs_1.5.2             
##  [88] magrittr_2.0.3        data.table_1.14.2     magick_2.7.3         
##  [91] openxlsx_4.2.5        nonnest2_0.5-5        reprex_2.0.2         
##  [94] googledrive_2.0.0     tmvnsim_1.0-2         mvtnorm_1.1-3        
##  [97] matrixStats_0.62.0    hms_1.1.2             mime_0.12            
## [100] evaluate_0.16         xtable_1.8-4          XML_3.99-0.10        
## [103] jpeg_0.1-9            readxl_1.4.1          fastDummies_1.6.3    
## [106] gridExtra_2.3         rstantools_2.2.0      compiler_4.1.2       
## [109] crayon_1.5.1          minqa_1.2.4           StanHeaders_2.21.0-7 
## [112] htmltools_0.5.3       corpcor_1.6.10        ggfun_0.0.7          
## [115] later_1.3.0           tzdb_0.3.0            Formula_1.2-4        
## [118] aplot_0.1.6           RcppParallel_5.1.5    lubridate_1.8.0      
## [121] DBI_1.1.3             tweenr_2.0.1          kutils_1.70          
## [124] dbplyr_2.2.1          MASS_7.3-54           boot_1.3-28          
## [127] Matrix_1.3-4          cli_3.3.0             parallel_4.1.2       
## [130] igraph_1.3.4          pkgconfig_2.0.3       sem_3.1-15           
## [133] foreign_0.8-81        xml2_1.3.3            pbivnorm_0.6.0       
## [136] bslib_0.4.0           CompQuadForm_1.4.3    rvest_1.0.3          
## [139] snakecase_0.11.0      yulab.utils_0.0.5     callr_3.7.2          
## [142] digest_0.6.29         semPlot_1.1.6         rmarkdown_2.16       
## [145] cellranger_1.1.0      htmlTable_2.4.1       lisrelToR_0.1.5      
## [148] gdtools_0.2.4         gtools_3.9.3          shiny_1.7.2          
## [151] nloptr_2.0.3          glasso_1.11           lifecycle_1.0.1      
## [154] nlme_3.1-153          jsonlite_1.8.0        carData_3.0-5        
## [157] viridisLite_0.4.1     fansi_1.0.3           pillar_1.8.1         
## [160] lattice_0.20-45       loo_2.5.1             survival_3.2-13      
## [163] fastmap_1.1.0         httr_1.4.4            pkgbuild_1.3.1       
## [166] glue_1.6.2            fdrtool_1.2.17        zip_2.2.0            
## [169] MplusAutomation_1.1.0 png_0.1-7             pander_0.6.5         
## [172] stringi_1.7.8         sass_0.4.2            blavaan_0.4-3        
## [175] latticeExtra_0.6-30   future.apply_1.9.0

This script was last updated on:

## Thu 01 September 2022, 12:20:01, AWST (Australia/Perth)

………………. THE END ………………….

By: Dr Fredrick M. Mobegi (BSc: Hons, MSc, PhD)
Created: 24-08-2022 Wed 12:13
Copyright © 2022 | Fredrick Mobegi; PathWest Laboratory Medicine WA | This notebook is for research reference purposes only and it may contains links to embargoed or legally privileged data. Except as permitted by the copyright law applicable to you, you may not reproduce or communicate any of the content on this page, including files downloadable from this page, without written permission of the copyright owner(s).